home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Software Vault: The Gold Collection
/
Software Vault - The Gold Collection (American Databankers) (1993).ISO
/
cdr48
/
bpl70n12.zip
/
ARISOURC.ZIP
/
FPKER.ASM
< prev
next >
Wrap
Assembly Source File
|
1993-03-07
|
32KB
|
649 lines
; *******************************************************
; * *
; * Turbo Pascal Runtime Library Version 7.0 *
; * Real Kernel Routines (Add,Sub,Mul,Div,Sqr) *
; * *
; * Copyright (C) 1989-1993 Norbert Juffa *
; * *
; *******************************************************
TITLE FPKER
CODE SEGMENT BYTE PUBLIC
ASSUME CS:CODE
;-------------------------------------------------------------------------------
;
; Turbo Pascal REAL floating-point format
;
; 47 46 8 7 0
; +--+------------------------------+--------+
; |S | Mantissa |Exponent|
; +--+------------------------------+--------+
;
; 47 31 15 7 0
; +-------------+------------+------+--------+
; | DX | BX | AH | AL |
; +-------------+------------+------+--------+
;
; 47 31 15 7 0
; +-------------+------------+------+--------+
; | DI | SI | CH | CL |
; +-------------+------------+------+--------+
;
; value = 1^(-S) * Mantissa/2^40 * 2^(Exponent - 129)
;
;-------------------------------------------------------------------------------
; Externals
EXTRN HaltError:NEAR
; Publics
PUBLIC RealAdd,RealSub,RealMul,RealDiv
PUBLIC RealSqr,RealSqrNoChk,RealDivRev
PUBLIC RealMulNoChk,RealMulNChk2
PUBLIC RAdd,RSub,RMul,RDiv,RSqr,ROverflow
;-------------------------------------------------------------------------------
; RealAdd and RealSub are the routines for adding and subtracting two numbers
; in the Turbo Pascal 6 byte floating point format. They are practically ident-
; ical, since subtraction is implemented as an addition with a negated second
; addend. If underflow occurs, zero is returned. On overflow the carry flag
; will be set. The rounding of these routines complies with the IEEE "round to
; nearest or even" mode. Guard and sticky flags are therefore fully implemented.
;
; INPUT: DX:BX:AX first addend
; DI:SI:CX second addend
;
; OUTPUT: DX:BX:AX sum
; CF set if overflow occured, else cleared
;
; DESTROYS: AX,BX,CX,DX,SI,DI,Flags
;-------------------------------------------------------------------------------
AddExt PROC NEAR
$ret_second: XCHG AX, CX ; load second addend
MOV BX, SI ; into DX:BX:AX (DX currently loaded)
RET ; done
AddExt ENDP
RealSub PROC NEAR
XOR DI, 8000h ; negate second argument
RealSub ENDP
RealAdd PROC NEAR
CMP CL, AL ; second addend bigger ?
JAE $bigger ; yes
XCHG AX, CX ; no,
XCHG BX, SI ; exchange
XCHG DX, DI ; addends
$bigger: XCHG DX, DI ; DX = msb of second addend
NEG AL ; smaller addend zero ?
JZ $ret_second ; yes, return other addend
ADD AL, CL ; compute difference of exponents
CMP AL, 41 ; difference too big ?
JA $ret_second ; yes, add/sub will not change bigger arg
PUSH BP ; save TURBO-Pascal frame pointer
MOV BP, 0FF00h ; load mask for msb
AND BP, CX ; save msb of second addend
MOV CH, 80h ; mask for sign bit
AND CH, DH ; sign bit of second addend
PUSH CX ; save sign and exponent
XOR CX, DI ; test if operands have different sign
PUSHF ; save sign indicator
OR DH, 80h ; set implicit bit in second addend
XCHG DX, DI ; DX = msb of first addend
OR DH, 80h ; set implicit bit in first addend
XOR CX, CX ; set guard and sticky bytes to 0
XCHG AL, CH ; DX:BX:AX = mantissa, CH = shift counter
$test_shift: CMP CH, 4 ; less than 4 bit shifts necessary ?
JB $bit_shift ; yes, do it one bit at a time
CMP CH, 8 ; between 4 and 7 bit shifts necessary ?
JB $4bit_shift ; yes, do 4 bit shift first
CMP CH, 16 ; between 8 and 15 bit shifts necessary ?
JB $byte_shift ; yes, do byte shift first
OR CL, AL ; accumulate
OR CL, AH ; sticky byte
XCHG AX, DX ; shift
XCHG AX, BX ; mantissa 16 bits
XOR DX, DX ; to the right
SUB CH, 16 ; decrement shift counter by 16
JMP $test_shift ; test remaining shifts
$byte_shift: OR CL, AL ; accumulate sticky byte
MOV AL, AH ; shift
MOV AH, BL ; mantissa
MOV BL, BH ; eight
MOV BH, DL ; bits
MOV DL, DH ; to the
XOR DH, DH ; right
TEST CH, 4 ; 4 bit shift possible ?
JZ $bit_shift ; no, try single bit shifts
$4bit_shift: NEG CL ; set sticky flag = FFh
SBB CL, CL ; if <> 0 before
OR CL, AL ; accumulate
AND CL, 0Fh ; sticky flag
SHR DX, 1 ; shift
RCR BX, 1 ; mantissa
RCR AX, 1 ; 1 bit to the right
SHR DX, 1 ; shift
RCR BX, 1 ; mantissa
RCR AX, 1 ; 1 bit to the right
SHR DX, 1 ; shift
RCR BX, 1 ; mantissa
RCR AX, 1 ; 1 bit to the right
SHR DX, 1 ; shift
RCR BX, 1 ; mantissa
RCR AX, 1 ; 1 bit to the right
$bit_shift: AND CH, 3 ; compute number of single bit shifts
JZ $shift_done ; no shifts necessary, mantissas aligned
NEG CL ; set sticky flag to FFh
SBB CL, CL ; if <> 0 before
ALIGN 4
$bit_loop: SHR DX, 1 ; shift
RCR BX, 1 ; mantissa
RCR AX, 1 ; 1 bit to the right
ADC CL, CL ; accumulate sticky byte
DEC CH ; decrement shift counter
JNZ $bit_loop ; until shift counter zero
$shift_done: POPF ; signs of addends different ?
JS $subtract ; sign of addends differ
ADD AX, BP ; add
ADC BX, SI ; mantissas
ADC DX, DI ; of two addends
MOV BP, CX ; get sticky byte
POP CX ; get exponent and sign
JNC $no_overf ; no mantissa overflow
SHR DX, 1 ; divide
RCR BX, 1 ; mantissa
RCR AX, 1 ; by two
INC CX ; adjust exponent
$no_overf: DEC CX ; exponent-1
JMP $add_sub_end ; do rounding
$ret_first: POP BP ; restore TURBO-Pascal frame pointer
RET ; done
$subtract: XCHG AX, BP ; exchange
XCHG BX, SI ; addends
XCHG DX, DI ; for correct order
NEG CX ; set carry if sticky byte <> 0
SBB AX, BP ; subtract
SBB BX, SI ; the two
SBB DX, DI ; mantissas
MOV BP, CX ; get sticky byte
POP CX ; get exponent and sign of result
JNC $no_negate ; no negative result
XOR CH, 80h ; result has other sign than 2. addend
NOT DX ; negate
NOT BX ; number
NEG AX ; in
SBB BX, -1 ; DX:BX:AX
SBB DX, -1 ; "
$no_negate: JS $no_overf ; mantissa normalized
JZ $test_z1 ; first mantissa word is zero
ALIGN 4
$shift_l: DEC CX ; adjust exponent
ADD AX, AX ; multiply
ADC BX, BX ; mantissa
ADC DX, DX ; by two
JNS $shift_l ; normalized? no
DEC CX ; exponent-1
JMP $add_sub_end ; do rounding
$test_z1: XCHG BX, AX ; do a 16-bit
XCHG DX, AX ; left shift of the mantissa
SUB CX, 16 ; adjust exponent
OR DX, DX ; first mantissa word zero?
JG $shift_l ; not zero, no sign
JS $no_overf ; mantissa normalized
XCHG DX, BX ; shift mantissa 16 bits left (AX=0)
SUB CX, 16 ; adjust exponent
OR DX, DX ; first mantissa word zero ?
JG $shift_l ; not zero, no sign
JS $no_overf ; mantissa normalized
POP BP ; mantissa zero, return DX:BX:AX=0
RET ; done
RealAdd ENDP
;-------------------------------------------------------------------------------
; RealMul multiplies two numbers in the Turbo Pascal 6 byte floating point
; format. If underflow occurs, zero is returned. On overflow the carry flag
; will be set. The routine multiplies the mantissas by computing nine partial
; products using the 80x86 MUL instruction. RealMulNoChk is the same routine
; as RealMul but does not check the operand in DI:SI:CX for zero. The fastest
; multiplication routine, RealMulNChk2, does not check either operand for zero.
; The rounding of this routine complies with IEEE "round to nearest or even"
; mode. For this purpose, guard and sticky flags are implemented.
;
; INPUT: DX:BX:AX multiplicand
; DI:SI:CX multiplicator
;
; OUTPUT: DX:BX:AX product
; CF set if overflow occured, else cleared
;
; DESTROYS: AX,BX,CX,DX,SI,DI,Flags
;-------------------------------------------------------------------------------
ALIGN 4
RealMul PROC NEAR
OR CL, CL ; multiplicator = 0 ?
JZ $zero_res ; result will be 0
RealMulNoChk PROC NEAR
OR AL, AL ; multiplicand = 0 ?
JZ $zero_res ; result is zero
RealMulNChk2 PROC NEAR
PUSH BP ; save TURBO-framepointer
XCHG BX, DI ; BX = b1, DI = a2
MOV BP, DX ; get sign of multiplicant
XOR BP, BX ; compute sign of result
AND BP, 8000h ; mask out sign bit
XCHG AL, CH ; save b3
ADD CL, CH ; sum of biased exponents
SBB CH, CH ; clear msb
NEG CH ; and put possible overflow in CH
OR CX, BP ; zap in sign bit
PUSH CX ; save new exponent and sign bit
XOR CX, CX ; clear lo-bytes of a3 and b3
OR DH, 80h ; set implicit bit of multipicand
OR BH, 80h ; set implicit bit of multiplicator
OR SI, SI ; b2 = 0 ?
JZ $test_short ; yes, test if b3 = 0
OR DI, DI ; a2 = 0 ?
JNZ $full_mult ; no, use full multiplication
OR AH, AH ; a3 = 0 ?
JNZ $full_mult ; no, use full multiplication
XCHG AH, AL ; swap a3 and b3
XCHG DI, SI ; swap a2 and b2
XCHG DX, BX ; swap a1 and b1
$test_short: OR AL, AL ; b3 = 0 ?
JNZ $full_mult ; no, use full multiplication
MOV SI, DX ; save a1
MUL BX ; b1 * a3
MOV BP, AX ; generate sticky byte = 0
XCHG AX, DX ; AX = msw of product
XCHG AX, DI ; save msw of product, get a2
MUL BX ; b1 * a2
XCHG AX, BX ; save lsw of product, get b1
XCHG DX, SI ; save msw of product, get a1
ADD BX, DI ; add product
ADC SI, CX ; to FPA
MUL DX ; b1 * a1
ADD AX, SI ; add product
ADC DX, CX ; result in DX:AX:BX
JMP $end_mantiss ; handle exponent
$zero_res: JMP $zero_prod2 ; result is 0
ALIGN 4
$full_mult: PUSH BX ; save b1
PUSH DI ; save a2
PUSH SI ; save b2
PUSH DX ; save a1
PUSH BX ; save b1
MOV BX, CX ; clear most significant word of FPA
XCHG AL, CH ; CH = b3, AL = 0
MOV BP, AX ; a3
MOV AL, CH ; b3
MUL AH ; a3 * b3
XCHG AX, DI ; store to FPA, get a2
MUL CX ; a2 * b3
ADD DI, AX ; add result
ADC DX, BX ; to FPA
XCHG AX, DX ; and
XCHG AX, SI ; get b2
MUL BP ; a3 * b2
ADD DI, AX ; add result
ADC SI, DX ; to
ADC BX, BX ; FPA
XCHG AX, BP ; get a3
MOV BP, DI ; generate sticky flag
XOR DI, DI ; FPA = DI:BX:SI
POP DX ; get b1
MUL DX ; a3 * b1
ADD SI, AX ; add result to
ADC BX, DX ; FPA, no overflow possible
XCHG AX, CX ; b3
POP CX ; a1
MUL CX ; a1 * b3
ADD SI, AX ; add
ADC BX, DX ; result to
ADC DI, DI ; FPA
POP AX ; b2
POP DX ; a2
PUSH DX ; save a2
PUSH AX ; save b2
MUL DX ; a2 * b2
ADD SI, AX ; add
ADC BX, DX ; result
ADC DI, 0 ; to FPA
OR BP, SI ; accumulate sticky flag
XOR SI, SI ; FPA = SI:DI:BX
POP AX ; b2
MUL CX ; a1 * b2
ADD BX, AX ; add
ADC DI, DX ; result
ADC SI, SI ; to FPA
POP AX ; a2
POP DX ; get b1
PUSH DX ; save b1
MUL DX ; a2 * b1
ADD BX, AX ; add -------+
POP AX ; get b1 !
$sqr_end: ADC DI, DX ; result <-+
ADC SI, 0 ; to FPA
MUL CX ; a1 * b1
ADD AX, DI ; add result to FPA
ADC DX, SI ; DX:AX:BX = result
$end_mantiss:POP CX ; CH = exponent CL = sign
XCHG AX, BX ; DX:BX:AX = result
; SUB CX, 81h ; compute new exponent-1
$div_end: ; OR DX, DX ; is mantissa normalized ?
; JS $add_sub_end ; yes
js $$1
ADD AX, AX ; no, shift
ADC BX, BX ; FPA 1 bit
ADC DX, DX ; to the left
DEC CX ; adjust exponent
$$1: sub cx, 81h
$add_sub_end:ADD AX, 80h ; round
ADC BX, 0 ; up
ADC DX, 0 ; mantissa
ADC CX, 0 ; increment exponent if mantissa overfl.
OR AL, AL ; tie case ?
JZ $tie_case ; tie case possible if sticky = 0, too
$round_done: POP BP ; restore caller's frame pointer
TEST CH, 40H ; test if (exponent-1) negative
JNZ $zero_prod2 ; yes, underflow, return zero
INC CX ; new exponent
MOV AL, CL ; store exponent
AND DH, 7Fh ; force MSB of mantissa to 0
OR DH, CH ; fill in sign bit
IFDEF NOOVERFLOW
ROR CH, 1 ; test if exponent overflow
ROL CH, 1 ; restore sign flag
ELSE
SHR CH, 1 ; test if exponent overflow (> FFh)
ENDIF
RET ; done
$zero_prod2: XOR AX, AX ; load
MOV BX, AX ; a
CWD ; zero
RET ; done
$tie_case: OR BP, BP ; sticky flag = 0 (tie case) ?
JNZ $round_done ; no, round up was correct
AND AH, 0FEh ; tie case, make mantissa even
JMP $round_done ; IEEE rounding done
RealMulNChk2 ENDP
RealMulNoChk ENDP
RealMul ENDP
;-------------------------------------------------------------------------------
; RealSqr computes the square of a number in the Turbo Pascal 6-byte floating
; point format. If underflow occurs, zero is returned. On overflow the carry
; flag will be set. Since squaring allows for some optimizations in code when
; compared with normal multiplication, RealSqr is implemented as a self con-
; tained routine and not as a call to RealMul. The routine exits thru RealMul.
; RealSqrNoChk does not check the argument for zero before squaring. Rounding
; complies with the IEEE "round to nearest or even" mode, so guard and sticky
; flags are provided.
;
; INPUT: DX:BX:AX argument
;
; OUTPUT: DX:BX:AX square of argument
; CF set if overflow occured, else cleared
;
; DESTROYS: AX,BX,CX,DX,SI,DI,Flags
;-------------------------------------------------------------------------------
RealSqr PROC NEAR
OR AL, AL ; argument = 0 ?
JZ $zero_prod2 ; result is zero
RealSqrNoChk PROC NEAR
XOR CX, CX ; clear register
XCHG CL, AL ; exponent in CL, AL = 0
ADD CX, CX ; new exponent, sign always positive (0)
PUSH BP ; save TURBO-Pascal frame pointer
PUSH CX ; save sign and exponent
OR DH, 80h ; set implicit bit of argument
MOV SI, AX ; a2 and
OR SI, BX ; a3 = 0 ?
JNZ $full_sqr ; no, do full multiplication
MOV AX, DX ; load a1
MUL DX ; a1 * a1
or dx, dx
JMP $end_mantiss ; result in DX:AX:BX
ALIGN 2
$full_sqr: PUSH BX ; save a2
XOR DI, DI ; load zero
MOV CX, DX ; save a1
MOV BP, AX ; save a3
MOV AL, AH ; load a3
MUL AL ; a3 * a3
XCHG AX, BX ; save product, get a2
MUL BP ; a2 * a3
XCHG AX, BP ; get a3, BP = save lo-word a2*a3
MOV SI, DX ; save hi-word a2*a3
ADD BX, BP ; add a3*a3 to
ADC SI, DI ; a2*a3 (result in SI:BX, no overflow)
ADD BP, BX ; add a2*a3 lo-word to result
MOV BX, DI ; BX = 0
ADC SI, DX ; add a2*a3 hi-word
ADC DI, DI ; to result (DI:SI:BP)
XCHG DI, BX ; FPA = DI:BX:SI, BP = sticky byte
MUL CX ; a1 * a3
ADD SI, AX ; add product
ADC BX, DX ; to FPA (no overflow possible)
ADD SI, AX ; add
ADC BX, DX ; product to
ADC DI, DI ; FPA another time
POP AX ; get a2
PUSH AX ; save a2
MUL AX ; a2 * a2
ADD SI, AX ; add
ADC BX, DX ; product to
ADC DI, 0 ; FPA
OR BP, SI ; accumulate sticky byte
XOR SI, SI ; FPA = SI:DI:BX
POP AX ; get a2
MUL CX ; a1 * a2
ADD BX, AX ; add
ADC DI, DX ; resulting
ADC SI, SI ; product
ADD BX, AX ; to FPA twice
MOV AX, CX ; AX = CX = a1
JMP $sqr_end ; exit thru REAL_MUL
RealSqrNoChk ENDP
RealSqr ENDP
;-------------------------------------------------------------------------------
; RealDiv divides two numbers in the Turbo Pascal 6 byte floating point
; format. If underflow occurs, zero is returned. On overflow the carry flag
; will be set. The routine exits through the REAL_MUL routine. It makes use
; of the 80x86 DIV instruction in an estimate and correct algorithm. In each
; of the three steps, an estimation of a part of the quotient is produced by
; dividing the first 32 bits of the current remainder by the first 16 bits of
; the divisor using a machine instruction. Then the divisor is multiplied by
; the result and this product subtracted from the current remainder. If the sum
; is negative, the partial quotient must be decremented until the new remainder
; is positive. RealDivRev is an additional routine which exchanges the operands
; before performing the division. The rounding provided complies with IEEE
; "round to nearest or even" mode. For this purpose, guard and sticky flags
; are implemented.
;
; INPUT: DX:BX:AX dividend
; DI:SI:CX divisor
;
; OUTPUT: DX:BX:AX quotient
; CF set if overflow occured, else cleared
;
; DESTROYS: AX,BX,CX,DX,SI,DI,Flags
;-------------------------------------------------------------------------------
RealDivRev PROC NEAR
XCHG AX, CX ; exchange
XCHG BX, SI ; divisor and
XCHG DX, DI ; dividend
RealDivRev ENDP
RealDiv PROC NEAR
OR AL, AL ; dividend = 0 ?
JZ $zero_prod2 ; result is zero
PUSH BP ; save TURBO-Pascal framepointer
MOV BP, DX ; get msw of dividend
XOR BP, DI ; xor with msw of divisor to make sign
AND BP, 8000h ; isolate sign bit of result
OR DH, 80h ; set implicit bit in dividend
XCHG DX, DI ; DX = divisor msw, DI = dividend msw
OR DH, 80h ; set implicit bit in divisor
SUB AL, CL ; subtract exponents ----------+
MOV CL, 0 ; clear lsb of divisor lsw |
PUSH SI ; save divisor middle word |
PUSH CX ; and lsw on stack |
MOV CX, BP ; get sign |
XCHG AL, CL ; AL = 0, CL = new exponent |
SBB CH, AL ; put carry here <-------------+
add cx,101h
MOV BP, SP ; access divisor on stack via BP
SUB BP, 6 ; leave room for three pushes
SHR DI, 1 ; divide dividend
RCR BX, 1 ; by 2 to prevent
RCR AX, 1 ; an overflow condition
ALIGN 4
$divide_loop:PUSH CX ; save sign & exponent resp. part. quot.
MOV CX, DX ; get msw of divisor
XCHG AX, BX ; create new dividend
XCHG AX, DI ; by shifting remainder
XCHG AX, SI ; 16 bits to the left
CMP CX, SI ; overflow possible on division ?
JE $div_overfl ; yes
MOV DX, SI ; get msw of dividend
XCHG AX, DI ; second word of dividend
DIV CX ; compute partial quotient
XOR SI, SI ; subtract product of divisor high word
MOV DI, DX ; and partial quotient from dividend
$comp_rem: XCHG AX, CX ; AX = divisor high word, CX = quotient
PUSH AX ; save divisor high word
MOV AX, [BP+8] ; get middle word of divisor
MUL CX ; multiply by partial quotient
SUB BX, AX ; subtract the product of
SBB DI, DX ; divisor middle word and partial
SBB SI, 0 ; quotient from dividend
MOV AX, [BP+6] ; get lsw of divisor
MUL CX ; multiply by partial quotient
NEG AX ; subtract the product
SBB BX, DX ; of divisor LSW
SBB DI, 0 ; and partial
SBB SI, 0 ; quotient from dividend
POP DX ; get back msw of divisor
JZ $sub_ok ; remainder must be positive
$add_twice: DEC CX ; quotient to high, decrement it
ADD AX, [BP+6] ; adjust
ADC BX, [BP+8] ; quotient and
ADC DI, DX ; remainder
JNC $add_twice ; until remainder positive
$sub_ok: CMP BP, SP ; two partial quotients saved already ?
JNE $divide_loop ; no, continue (carry set !!!)
MOV BP, AX ; accumulate
OR BP, BX ; sticky
OR BP, DI ; byte
XCHG AX, CX ; get last partial quotient
POP BX ; get other
POP DX ; partial quotients
POP CX ; get sign and exponent
ADD SP, 4 ; remove saved divisor from stack
or dx, dx
JMP $div_end ; normalize mantissa and round
$div_overfl: XOR SI, SI ; remainder - 10000h * divisor
ADD DI, CX ; remainder -
ADC SI, SI ; FFFFh * divisor
MOV AX, -1 ; quotient = FFFFh
JMP $comp_rem ; continue computation of remainder
RealDiv ENDP
ALIGN 4
RAdd PROC FAR
CALL RealAdd ; perform addition
JC ROverflow ; overflow error
RET ; done
RAdd ENDP
ALIGN 4
RSub PROC FAR
CALL RealSub ; perform subtraction
JC ROverflow ; overflow error
RET ; done
RSub ENDP
ALIGN 4
RSqr PROC FAR
CALL RealSqr
JC ROverflow
RET
RSqr ENDP
ALIGN 4
RMul PROC FAR
CALL RealMul ; perform multiplication
JC ROverflow ; overflow error
RET ; done
RMul ENDP
ALIGN 4
RDiv PROC FAR
OR CL, CL ; divisor zero ?
JZ RDivZero ; yes, error
CALL RealDiv ; perform division
JC ROverflow ; overflow error
RET ; done
RDiv ENDP
ROverflow: MOV AX, 0CDh ; error code 205 (fp overflow)
JMP HaltError ; execute error handler
RDivZero: MOV AX, 0C8h ; error code 200 (division by zero)
JMP HaltError ; execute error handler
ALIGN 4
CODE ENDS
END